import numpy as np
import re
import scipy.stats as stats
from decimal import Decimal


# expects tables of the form:
# <skipped heading text>
# \textbf{1-in-linear} & $3.6\pm 0.1$ & $8.8\pm 3.6$& $48\pm 0.1$ & $ 1.60\mathrm{e}-02$ & $1.24\mathrm{e}-11$ \\
# \textbf{2-in-linear} & $2.6\pm 2.0$ & $27\pm4.0$& $50\pm 2.0$ &  $ 1.30\mathrm{e}-04$ & $1.51\mathrm{e}-06$ \\

def strip_mean_std(token):
	# strips only tne numbers and the decimal points then converts to a float, assuming of the form <numbers>.<numbers>
	mean, std = token.split('\\pm')
	return float(re.sub(r"[^0-9.]", "", mean)), float(re.sub(r"[^0-9.]", "", std))

def perform_t_test(pth, num_trials):
	outcomes = list()
	with open(pth, 'r') as f:
		for line in f.readlines():
			outcome_line=  list()
			collecting= False
			try:
				tokens = line.split("&")
				if len(tokens) < 2: continue
				base = tokens[1]
				base_mean, base_std = strip_mean_std(base)
				# print(line)
				# skip the first value, if necessary you should skip more values
				for null in tokens[2:]:
					# for every subsequent token (if there are tokens you don't want to compare, just strip those)
					null_mean, null_std = strip_mean_std(null)
					# perform the actual significance task for every token 
					t_val = (base_mean - null_mean) / np.sqrt((max(0.001, base_std)**2 + max(0.001, null_std)**2) /num_trials)
					outcome_line.append('%.2E' % stats.t.sf(np.abs(t_val), num_trials-1))
					# print((base_mean, null_mean), np.sqrt((base_std**2 + null_std**2) /num_trials), str(t_val))
					
					collecting = True
			except ValueError as e:
				# print(e)
				if collecting: outcomes.append(outcome_line)
	for ol in outcomes:
		# print out the interior lines
		print("$ "+ "$ & $".join(ol))

perform_t_test("./table.txt", 5)